import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
import plotly.offline as py
import plotly.graph_objs as go
from ipywidgets import widgets, interactive
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvas
filled_sentinel = pd.read_csv('./data/csvs/filled_sentinel.csv')
viirs_df = pd.read_csv('./data/csvs/viirs.csv')
filled_sentinel_final = pd.read_csv('./data/csvs/filled_sentinel_final_class.csv')
filled_sentinel_uk = pd.read_csv('./data/csvs/filled_sentinel_uk_class.csv')
filled_sentinel_philly_bldgs = pd.read_csv('./data/csvs/filled_sentinel_philly_bldgs_class.csv')
s2_filled
| date | NDVI | IBI | NDBI | UI | |
|---|---|---|---|---|---|
| 0 | 2020-01-04 | 0.549254 | -1.775703e+00 | -0.231373 | -0.429414 |
| 1 | 2020-01-04 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
| 2 | 2020-01-04 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
| 3 | 2020-01-06 | 0.695607 | -6.752086e+00 | -0.412036 | -0.588348 |
| 4 | 2020-01-06 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
| ... | ... | ... | ... | ... | ... |
| 1014 | 2022-04-28 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
| 1015 | 2022-04-28 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
| 1016 | 2022-04-30 | 0.315161 | -8.190977e+00 | -0.233853 | -0.407168 |
| 1017 | 2022-04-30 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
| 1018 | 2022-04-30 | 0.467781 | -4.107221e+07 | -0.187610 | -0.366849 |
1019 rows × 5 columns
viirs_df
| reducer | date | avg_rad | |
|---|---|---|---|
| 0 | mean | 2020-01-01 | 4.675772 |
| 1 | mean | 2020-02-01 | 4.692925 |
| 2 | mean | 2020-03-01 | 4.352947 |
| 3 | mean | 2020-04-01 | 4.783422 |
| 4 | mean | 2020-05-01 | 4.536045 |
| 5 | mean | 2020-06-01 | 4.577717 |
| 6 | mean | 2020-07-01 | 4.449237 |
| 7 | mean | 2020-08-01 | 4.539433 |
| 8 | mean | 2020-09-01 | 4.442848 |
| 9 | mean | 2020-10-01 | 5.170358 |
| 10 | mean | 2020-11-01 | 4.526725 |
| 11 | mean | 2020-12-01 | 4.377061 |
| 12 | mean | 2021-01-01 | 4.748482 |
| 13 | mean | 2021-02-01 | 5.666021 |
| 14 | mean | 2021-03-01 | 5.804959 |
| 15 | mean | 2021-04-01 | 5.735382 |
| 16 | mean | 2021-05-01 | 6.051599 |
| 17 | mean | 2021-06-01 | 5.547343 |
| 18 | mean | 2021-07-01 | 5.462366 |
| 19 | mean | 2021-08-01 | 5.467300 |
| 20 | mean | 2021-09-01 | 5.535593 |
| 21 | mean | 2021-10-01 | 5.322791 |
| 22 | mean | 2021-11-01 | 4.665055 |
| 23 | mean | 2021-12-01 | 5.529470 |
| 24 | mean | 2022-01-01 | 6.146584 |
| 25 | mean | 2022-02-01 | 6.270673 |
| 26 | mean | 2022-03-01 | 5.708471 |
| 27 | mean | 2022-04-01 | 6.230568 |
def data_qualityCheck(data):
print("Checking data qualitites...")
data.isnull().sum()
data.info()
print("check finished.")
data_qualityCheck(s2_filled)
Checking data qualitites... <class 'pandas.core.frame.DataFrame'> RangeIndex: 1019 entries, 0 to 1018 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 1019 non-null object 1 NDVI 1019 non-null float64 2 IBI 1019 non-null float64 3 NDBI 1019 non-null float64 4 UI 1019 non-null float64 dtypes: float64(4), object(1) memory usage: 39.9+ KB check finished.
def mpl_plot(avg, highlight):
fig = Figure()
FigureCanvas(fig) # not needed in mpl >= 3.1
ax = fig.add_subplot()
avg.plot(ax=ax)
if len(highlight): highlight.plot(style='o', ax=ax)
return fig
def find_outliers_s2(variable='NDVI', window=30, sigma=10, view_fn=mpl_plot):
avg = s2_filled[variable].rolling(window=window).mean()
residual = s2_filled[variable] - avg
std = residual.rolling(window=window).std()
outliers = (np.abs(residual) > std * sigma)
return view_fn(avg, avg[outliers])
def find_outliers_viirs(variable='avg_rad', window=30, sigma=10, view_fn=mpl_plot):
avg = viirs_df[variable].rolling(window=window).mean()
residual = viirs_df[variable] - avg
std = residual.rolling(window=window).std()
outliers = (np.abs(residual) > std * sigma)
return view_fn(avg, avg[outliers])
import panel as pn
pn.extension()
pn.interact(find_outliers_s2)
kw = dict(window=(1, 60), variable=sorted(list(s2_filled.columns)), sigma=(1, 20))
i_s2 = pn.interact(find_outliers_s2, **kw)
i_s2.pprint()
Column
[0] Column
[0] Select(name='variable', options=['IBI', 'NDBI', ...], value='NDVI')
[1] IntSlider(end=60, name='window', start=1, value=30)
[2] IntSlider(end=20, name='sigma', start=1, value=10)
[1] Row
[0] Matplotlib(Figure, name='interactive00242')
text = "<br>\n# Room Occupancy\nSelect the variable, and the time window for smoothing"
viirs_p = pn.Row(i_s2[1][0], pn.Column(text, i_s2[0][0], i_s2[0][1]))
viirs_p
kw2 = dict(window=(1, 60), variable=sorted(list(viirs_df.columns)), sigma=(1, 20))
i_viirs = pn.interact(find_outliers, **kw2)
i_viirs.pprint()
Column
[0] Column
[0] Select(name='variable', options=['avg_rad', 'date', ...], value='avg_rad')
[1] IntSlider(end=60, name='window', start=1, value=30)
[2] IntSlider(end=20, name='sigma', start=1, value=10)
[1] Row
[0] Matplotlib(Figure, name='interactive00258')
text = "<br>\n# Room Occupancy\nSelect the variable, and the time window for smoothing"
viirs_p = pn.Row(i_viirs[1][0], pn.Column(text, i_viirs[0][0], i_viirs[0][1]))
viirs_p
find_outliers(variable='NDVI', window=12, sigma=2)
# function that compares two indices from the same dataset over the same x
def compare_time_series(data,x,y_1,y_2):
series_1 = go.Scatter(
x = data[x],
y = data[y_1],
mode = 'lines',
name = str(y_1),
line = dict(color=('rgb(244, 146, 65)'), width=2)
)
series_2 = go.Scatter(
x = data[x],
y = data[y_2],
mode = 'lines',
name = str(y_2),
line = dict(color=('rgb(66, 244, 155)'), width=2)
)
d = [series_1,series_2]
layout = dict(title = 'Comparison of '+ str(y_1) + ' and '+(y_2) + ' over ' + str(x),
xaxis = dict(title = 'Day number'), yaxis = dict(title = 'ndvi'))
fig = dict(data=d, layout=layout)
py.iplot(fig, filename='results_demonstrating0')
compare_time_series(s2_filled,'date','NDVI','NDBI')
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
import plotly.offline as py
import plotly.graph_objs as go
#Let's the variable correlation using the seaborn libary
plt.figure(figsize=(12,10))
cor = filled_sentinel_final.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()
# Seasonality for BitcoinPrices
s = sm.tsa.seasonal_decompose(filled_sentinel_final['NDVI'].fillna(filled_sentinel_final['NDVI'].mean()), period=60)
s.plot()
#working_Data = stocks
s = sm.tsa.seasonal_decompose(filled_sentinel_final['NDVI'], period=60)
trend = go.Scatter(x = np.arange(0, len(s.trend), 1),y = s.trend,mode = 'lines',name = 'Trend',
line = dict(color = ('rgb(244, 146, 65)'), width = 4))
seasonal = go.Scatter(x = np.arange(0, len(s.seasonal), 1),y = s.seasonal,mode = 'lines',name = 'Seasonal',
line = dict(color = ('rgb(66, 244, 155)'), width = 2))
residual= go.Scatter(x = np.arange(0, len(s.resid), 1),y = s.resid,mode = 'lines',name = 'Residual',
line = dict(color = ('rgb(209, 244, 66)'), width = 2))
observed = go.Scatter(x = np.arange(0, len(s.observed), 1),y = s.observed,mode = 'lines',name = 'Observed',
line = dict(color = ('rgb(66, 134, 244)'), width = 2))
w_data = [trend, seasonal, residual, observed]
layout = dict(title = 'Seasonal decomposition', xaxis = dict(title = 'Time'), yaxis = dict(title = 'Price, USD'))
fig = dict(data=w_data, layout=layout)
py.iplot(fig, filename='seasonal_decomposition')
# Autocorrelation for ndvi
plt.figure(figsize=(15,7))
ax = plt.subplot(211)
sm.graphics.tsa.plot_acf(filled_sentinel_final['NDVI'].squeeze(), lags=60, ax=ax)
ax = plt.subplot(212)
sm.graphics.tsa.plot_pacf(filled_sentinel_final['NDVI'].squeeze(), lags=60, ax=ax)
plt.tight_layout()
plt.show()